{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1288a1e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "#from vk_download import main\n",
    "import scipy.sparse as sprs\n",
    "#import vk\n",
    "import time\n",
    "from tqdm.auto import tqdm\n",
    "import os\n",
    "import time\n",
    "#from vk.exceptions import VkAPIError\n",
    "from requests.exceptions import ConnectionError \n",
    "from requests.exceptions import ReadTimeout \n",
    "import pandas as pd\n",
    "import random\n",
    "import datetime\n",
    "from tqdm.auto import tqdm\n",
    "from threading import Thread\n",
    "from multiprocessing import Process\n",
    "import numpy as np\n",
    "from scipy.sparse import csr_matrix, lil_matrix\n",
    "import scipy.sparse as sprs\n",
    "import os\n",
    "from collections import OrderedDict\n",
    "import shutil\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import networkx as nx\n",
    "from IPython.display import clear_output"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "48bb5c83-c821-47c7-927c-11b1f3775c4d",
   "metadata": {},
   "source": [
    "# Chose city"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "287074d9-d720-43e0-b1ec-64d54e0cd017",
   "metadata": {},
   "outputs": [],
   "source": [
    "folder = 'MainCity'  # This city was used in analysis\n",
    "\n",
    "# These two cities were used to check robustness of our results\n",
    "\n",
    "#folder = 'City(1)'  # Robustness 1\n",
    "#folder = 'City(2)'  # Robustness 2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "49264438-4302-4ffb-a76b-6fc8f97c14e9",
   "metadata": {},
   "source": [
    "# Prepare place for storing files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0cc0fc1f-0ef4-40b4-9cc3-12e8e7f9f782",
   "metadata": {},
   "outputs": [],
   "source": [
    "folders = os.listdir()\n",
    "\n",
    "if folder not in folders:\n",
    "    os.mkdir(folder)\n",
    "    \n",
    "folders = os.listdir(folder)\n",
    "\n",
    "if 'Masks' not in folders:\n",
    "    os.mkdir(f'{folder}/Masks')\n",
    "    \n",
    "if 'Vectors' not in folders:\n",
    "    os.mkdir(f'{folder}/Vectors')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b10ca24-435f-4160-b5fa-dffa9925eaac",
   "metadata": {},
   "source": [
    "# Download network and opinion snapshots, as well as users' attributes (age, gender) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "000c1dd1-6824-4577-9c03-1654f7675ea9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(29248, 29248)\n",
      "(29248, 29248)\n",
      "\n",
      "(29248, 3)\n",
      "(29248, 1)\n",
      "\n",
      "(29248, 3692970)\n",
      "(29248, 3692970)\n"
     ]
    }
   ],
   "source": [
    "A1 = sprs.load_npz(f'AdjMatrix(1) - {folder}.npz')\n",
    "A2 = sprs.load_npz(f'AdjMatrix(2) - {folder}.npz')\n",
    "\n",
    "print(A1.shape)\n",
    "print(A2.shape)\n",
    "\n",
    "print('')\n",
    "\n",
    "X1 = pd.read_csv(f'UsersAttributes(1) - {folder}.csv')\n",
    "X2 = pd.read_csv(f'UsersAttributes(2) - {folder}.csv')\n",
    "\n",
    "print(X1.shape)\n",
    "print(X2.shape)\n",
    "\n",
    "print('')\n",
    "\n",
    "S1 = sprs.load_npz(f'Followees(1) - {folder}.npz')\n",
    "S2 = sprs.load_npz(f'Followees(2) - {folder}.npz')\n",
    "\n",
    "\n",
    "print(S1.shape)\n",
    "print(S2.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ba297869",
   "metadata": {},
   "source": [
    "# Finding and isolating the giant connected components (time moment $ t_{1} $)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "b96f3577-887c-413a-abe3-7ebf8619edaf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The size of the largest component: 19060\n",
      "\n",
      "The number of such components: 1\n",
      "\n",
      "The second largest component: 10\n"
     ]
    }
   ],
   "source": [
    "G = nx.from_scipy_sparse_array(A1)\n",
    "components = nx.connected_components(G)\n",
    "\n",
    "components_info = []\n",
    "components_list = []\n",
    "\n",
    "for i in components:\n",
    "    \n",
    "    new_component = i\n",
    "    \n",
    "    components_info.append(len(new_component))\n",
    "    components_list.append(new_component)\n",
    "    \n",
    "components_info = np.array(components_info)\n",
    "\n",
    "LargestComponentSize = components_info.max()\n",
    "\n",
    "print(f'The size of the largest component: {LargestComponentSize}')\n",
    "print('')\n",
    "print(f'The number of such components: {len(components_info[components_info == LargestComponentSize])}')\n",
    "print('')\n",
    "print(f'The second largest component: {components_info[components_info < LargestComponentSize].max()}')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "77dc7570-a777-454e-ba3d-50479c406753",
   "metadata": {},
   "outputs": [],
   "source": [
    "gcc1 = components_list[components_info.argmax()]\n",
    "\n",
    "gcc1 = np.array(list(gcc1))\n",
    "\n",
    "#len(gcc1)\n",
    "\n",
    "#gcc1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "df138edb-af84-46d9-bd11-5db15ff9f43a",
   "metadata": {},
   "source": [
    "# Finding and isolating the giant connected components (time moment $ t_{2} $)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "399e10a5-7e68-4b5d-ba77-5cd579874bc2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The size of the largest component: 18811\n",
      "\n",
      "The number of such components: 1\n",
      "\n",
      "The second largest component: 9\n"
     ]
    }
   ],
   "source": [
    "G = nx.from_scipy_sparse_array(A2)\n",
    "components = nx.connected_components(G)\n",
    "\n",
    "components_info = []\n",
    "components_list = []\n",
    "\n",
    "for i in components:\n",
    "    \n",
    "    new_component = i\n",
    "    \n",
    "    components_info.append(len(new_component))\n",
    "    components_list.append(new_component)\n",
    "    \n",
    "components_info = np.array(components_info)\n",
    "\n",
    "LargestComponentSize = components_info.max()\n",
    "\n",
    "print(f'The size of the largest component: {LargestComponentSize}')\n",
    "print('')\n",
    "print(f'The number of such components: {len(components_info[components_info == LargestComponentSize])}')\n",
    "print('')\n",
    "print(f'The second largest component: {components_info[components_info < LargestComponentSize].max()}')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "def0d8ec-693d-45d0-8398-3402c67546b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "gcc2 = components_list[components_info.argmax()]\n",
    "\n",
    "gcc2 = np.array(list(gcc1))\n",
    "\n",
    "#len(gcc1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "5c9d99f8-af9a-4e35-8cd9-1e3b5f3372b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "for el in gcc1:\n",
    "    \n",
    "    if el not in gcc2:\n",
    "        \n",
    "        print('!')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "53ca7c5d-9e8f-4be2-bbd5-61884d2b21c7",
   "metadata": {},
   "source": [
    "# All nodes from the giant connected component of the graph $ G (t_{1}) $ lie in the giant connected component of the graph $ G (t_{2}) $\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "cf656acd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A2_gcc1 = A2[gcc1, :][:, gcc1]\n",
    "\n",
    "G = nx.from_scipy_sparse_array(A2_gcc1)\n",
    "\n",
    "nx.is_connected(G)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f981789f-566e-42ff-8553-3a98a23193b0",
   "metadata": {},
   "source": [
    "# If we exctract from the graph $ G (t_{2}) $ the subgraph built upon the giant connected component of the graph $ G (t_{1}), $ the resulting network will not be connected "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c40ee547-cff6-423c-8226-8d60e7627ea1",
   "metadata": {},
   "source": [
    "# In what follows, we can safely focus on the giant component from the graph $ G (t_{1}) $"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "14a53870",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(19060, 19060) (19060, 19060)\n"
     ]
    }
   ],
   "source": [
    "A1 = A1[gcc1, :][:, gcc1]\n",
    "A2 = A2[gcc1, :][:, gcc1]\n",
    "\n",
    "print(A1.shape, A2.shape)\n",
    "\n",
    "sprs.save_npz(f'{folder}/AdjMatrix(1)Giant.npz', A1)\n",
    "sprs.save_npz(f'{folder}/AdjMatrix(2)Giant.npz', A2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "43fc08a1-d2ed-4387-8d44-a1c618bcf919",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>prob_1</th>\n",
       "      <th>city_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.604357</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>57.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.778843</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.443861</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.505465</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.495546</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  sex    prob_1 city_name\n",
       "0  47.0    2  0.604357    Курган\n",
       "1  57.0    2  0.778843    Курган\n",
       "2  37.0    2  0.443861    Курган\n",
       "3  33.0    1  0.505465    Курган\n",
       "4  46.0    2  0.495546    Курган"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "df20d26b",
   "metadata": {},
   "outputs": [],
   "source": [
    "X1[X1.index.isin(gcc1)].to_csv(f'{folder}/UsersAttributes(1)Giant.csv', index=False)\n",
    "X2[X2.index.isin(gcc1)].to_csv(f'{folder}/UsersAttributes(2)Giant.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "c34e4f49-430c-4336-adb3-a92c1c8307cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "#gcc1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "fc9dca3a-2050-4b0c-99ea-eff266613c61",
   "metadata": {},
   "outputs": [],
   "source": [
    "S1 = S1[gcc1]\n",
    "S2 = S2[gcc1]\n",
    "\n",
    "#print(S1.shape)\n",
    "\n",
    "sprs.save_npz(f'{folder}/Followees(1)Giant.npz', S1)\n",
    "sprs.save_npz(f'{folder}/Followees(2)Giant.npz', S2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6436246b",
   "metadata": {},
   "source": [
    "# Let us prepare a one table that will include all information of users' attributes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "add81c38",
   "metadata": {},
   "outputs": [],
   "source": [
    "#folder = 'All cities'\n",
    "\n",
    "A1 = sprs.load_npz(f'{folder}/AdjMatrix(1)Giant.npz')\n",
    "A2 = sprs.load_npz(f'{folder}/AdjMatrix(2)Giant.npz')\n",
    "\n",
    "X1 = pd.read_csv(f'{folder}/UsersAttributes(1)Giant.csv')\n",
    "X2 = pd.read_csv(f'{folder}/UsersAttributes(2)Giant.csv')\n",
    "\n",
    "S1 = sprs.load_npz(f'{folder}/Followees(1)Giant.npz')\n",
    "S2 = sprs.load_npz(f'{folder}/Followees(2)Giant.npz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "1cc359bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>prob_1</th>\n",
       "      <th>city_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.604357</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>57.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.778843</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.443861</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.505465</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.495546</td>\n",
       "      <td>Курган</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  sex    prob_1 city_name\n",
       "0  47.0    2  0.604357    Курган\n",
       "1  57.0    2  0.778843    Курган\n",
       "2  37.0    2  0.443861    Курган\n",
       "3  33.0    1  0.505465    Курган\n",
       "4  46.0    2  0.495546    Курган"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# users' attributes and opinions at time t1\n",
    "\n",
    "X1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "b3a054c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prob_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.605188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.782339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.444395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.518122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.495679</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     prob_1\n",
       "0  0.605188\n",
       "1  0.782339\n",
       "2  0.444395\n",
       "3  0.518122\n",
       "4  0.495679"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# users' opinions at time t2\n",
    "\n",
    "X2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "2f912670",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>OpinionBefore</th>\n",
       "      <th>OpinionAfter</th>\n",
       "      <th>FriendsBefore</th>\n",
       "      <th>FriendsAfter</th>\n",
       "      <th>NewFriends</th>\n",
       "      <th>DeletedFriends</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.604357</td>\n",
       "      <td>0.605188</td>\n",
       "      <td>28.0</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>57.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.778843</td>\n",
       "      <td>0.782339</td>\n",
       "      <td>106.0</td>\n",
       "      <td>92</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.443861</td>\n",
       "      <td>0.444395</td>\n",
       "      <td>21.0</td>\n",
       "      <td>20</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.505465</td>\n",
       "      <td>0.518122</td>\n",
       "      <td>9.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.495546</td>\n",
       "      <td>0.495679</td>\n",
       "      <td>107.0</td>\n",
       "      <td>102</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  sex  OpinionBefore  OpinionAfter  FriendsBefore  FriendsAfter  \\\n",
       "0  47.0    2       0.604357      0.605188           28.0            25   \n",
       "1  57.0    2       0.778843      0.782339          106.0            92   \n",
       "2  37.0    2       0.443861      0.444395           21.0            20   \n",
       "3  33.0    1       0.505465      0.518122            9.0             6   \n",
       "4  46.0    2       0.495546      0.495679          107.0           102   \n",
       "\n",
       "   NewFriends  DeletedFriends  \n",
       "0         0.0             3.0  \n",
       "1         1.0            15.0  \n",
       "2         0.0             1.0  \n",
       "3         0.0             3.0  \n",
       "4         0.0             5.0  "
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = pd.DataFrame({'age' : X1['age'], \n",
    "                  'sex' : X1['sex'], \n",
    "                  'OpinionBefore' : X1['prob_1'], \n",
    "                  'OpinionAfter' : X2['prob_1']})\n",
    "\n",
    "del(X1, X2)\n",
    "\n",
    "delta_A = A2 - A1\n",
    "delta_A[delta_A < 0] = 0\n",
    "new_connections_number = np.array(delta_A.sum(axis=1)).ravel()\n",
    "\n",
    "delta_A = A2 - A1\n",
    "delta_A = delta_A*(-1)\n",
    "delta_A[delta_A < 0] = 0\n",
    "deleted_connections_number = np.array(delta_A.sum(axis=1)).ravel()\n",
    "\n",
    "X['FriendsBefore'] = np.array(A1.sum(axis=1)).ravel()\n",
    "X['FriendsAfter'] = np.array(A2.sum(axis=1)).ravel()\n",
    "X['NewFriends'] = new_connections_number\n",
    "X['DeletedFriends'] = deleted_connections_number\n",
    "\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "c715aba5-c4be-46e9-be50-7fb8150782d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>OpinionBefore</th>\n",
       "      <th>OpinionAfter</th>\n",
       "      <th>FriendsBefore</th>\n",
       "      <th>FriendsAfter</th>\n",
       "      <th>NewFriends</th>\n",
       "      <th>DeletedFriends</th>\n",
       "      <th>FolloweesBefore</th>\n",
       "      <th>FolloweesAfter</th>\n",
       "      <th>NewFollowees</th>\n",
       "      <th>DeletedFollowees</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.604357</td>\n",
       "      <td>0.605188</td>\n",
       "      <td>28.0</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>57.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.778843</td>\n",
       "      <td>0.782339</td>\n",
       "      <td>106.0</td>\n",
       "      <td>92</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.443861</td>\n",
       "      <td>0.444395</td>\n",
       "      <td>21.0</td>\n",
       "      <td>20</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.505465</td>\n",
       "      <td>0.518122</td>\n",
       "      <td>9.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.495546</td>\n",
       "      <td>0.495679</td>\n",
       "      <td>107.0</td>\n",
       "      <td>102</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  sex  OpinionBefore  OpinionAfter  FriendsBefore  FriendsAfter  \\\n",
       "0  47.0    2       0.604357      0.605188           28.0            25   \n",
       "1  57.0    2       0.778843      0.782339          106.0            92   \n",
       "2  37.0    2       0.443861      0.444395           21.0            20   \n",
       "3  33.0    1       0.505465      0.518122            9.0             6   \n",
       "4  46.0    2       0.495546      0.495679          107.0           102   \n",
       "\n",
       "   NewFriends  DeletedFriends  FolloweesBefore  FolloweesAfter  NewFollowees  \\\n",
       "0         0.0             3.0             65.0            68.0           6.0   \n",
       "1         1.0            15.0             91.0           100.0          14.0   \n",
       "2         0.0             1.0             52.0            59.0          11.0   \n",
       "3         0.0             3.0             38.0            41.0           6.0   \n",
       "4         0.0             5.0             18.0            20.0           2.0   \n",
       "\n",
       "   DeletedFollowees  \n",
       "0               3.0  \n",
       "1               5.0  \n",
       "2               4.0  \n",
       "3               3.0  \n",
       "4               0.0  "
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deltaS = S2 - S1\n",
    "deltaS[deltaS < 0] = 0\n",
    "NewFollowees = np.array(deltaS.sum(axis=1)).ravel()\n",
    "\n",
    "deltaS = S2 - S1\n",
    "deltaS = deltaS*(-1)\n",
    "deltaS[deltaS < 0] = 0\n",
    "DeletedFollowees = np.array(deltaS.sum(axis=1)).ravel()\n",
    "\n",
    "X['FolloweesBefore'] = np.array(S1.sum(axis=1)).ravel() \n",
    "X['FolloweesAfter'] = np.array(S2.sum(axis=1)).ravel() \n",
    "X['NewFollowees'] = NewFollowees\n",
    "X['DeletedFollowees'] = DeletedFollowees\n",
    "\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "39417612-2063-44b0-99a8-6c7be6a5de14",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>OpinionBefore</th>\n",
       "      <th>OpinionAfter</th>\n",
       "      <th>FriendsBefore</th>\n",
       "      <th>FriendsAfter</th>\n",
       "      <th>NewFriends</th>\n",
       "      <th>DeletedFriends</th>\n",
       "      <th>FolloweesBefore</th>\n",
       "      <th>FolloweesAfter</th>\n",
       "      <th>NewFollowees</th>\n",
       "      <th>DeletedFollowees</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47</td>\n",
       "      <td>2</td>\n",
       "      <td>0.604357</td>\n",
       "      <td>0.605188</td>\n",
       "      <td>28.0</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>65</td>\n",
       "      <td>68</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>57</td>\n",
       "      <td>2</td>\n",
       "      <td>0.778843</td>\n",
       "      <td>0.782339</td>\n",
       "      <td>106.0</td>\n",
       "      <td>92</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>91</td>\n",
       "      <td>100</td>\n",
       "      <td>14</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>2</td>\n",
       "      <td>0.443861</td>\n",
       "      <td>0.444395</td>\n",
       "      <td>21.0</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>52</td>\n",
       "      <td>59</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "      <td>0.505465</td>\n",
       "      <td>0.518122</td>\n",
       "      <td>9.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>38</td>\n",
       "      <td>41</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46</td>\n",
       "      <td>2</td>\n",
       "      <td>0.495546</td>\n",
       "      <td>0.495679</td>\n",
       "      <td>107.0</td>\n",
       "      <td>102</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  sex  OpinionBefore  OpinionAfter  FriendsBefore  FriendsAfter  \\\n",
       "0   47    2       0.604357      0.605188           28.0            25   \n",
       "1   57    2       0.778843      0.782339          106.0            92   \n",
       "2   37    2       0.443861      0.444395           21.0            20   \n",
       "3   33    1       0.505465      0.518122            9.0             6   \n",
       "4   46    2       0.495546      0.495679          107.0           102   \n",
       "\n",
       "   NewFriends  DeletedFriends  FolloweesBefore  FolloweesAfter  NewFollowees  \\\n",
       "0           0               3               65              68             6   \n",
       "1           1              15               91             100            14   \n",
       "2           0               1               52              59            11   \n",
       "3           0               3               38              41             6   \n",
       "4           0               5               18              20             2   \n",
       "\n",
       "   DeletedFollowees  \n",
       "0                 3  \n",
       "1                 5  \n",
       "2                 4  \n",
       "3                 3  \n",
       "4                 0  "
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X['age'] = X['age'].astype(int)\n",
    "\n",
    "X['FriendsAfter'] = X['FriendsAfter'].astype(int)\n",
    "X['NewFriends'] = X['NewFriends'].astype(int)\n",
    "X['DeletedFriends'] = X['DeletedFriends'].astype(int)\n",
    "X['FolloweesBefore'] = X['FolloweesBefore'].astype(int)\n",
    "X['FolloweesAfter'] = X['FolloweesAfter'].astype(int)\n",
    "X['NewFollowees'] = X['NewFollowees'].astype(int)\n",
    "X['DeletedFollowees'] = X['DeletedFollowees'].astype(int)\n",
    "\n",
    "\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "11bb7e8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "X.to_csv(f'{folder}/UsersAttributesGiant.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "73d4ed75",
   "metadata": {},
   "source": [
    "# For each user, add information regarding opinions of their friends' (measured as the average opinion of the neighborhood)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "0208de73",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = pd.read_csv(f'{folder}/UsersAttributesGiant.csv')\n",
    "\n",
    "\n",
    "A1 = sprs.load_npz(f'{folder}/AdjMatrix(1)Giant.npz')\n",
    "A2 = sprs.load_npz(f'{folder}/AdjMatrix(2)Giant.npz')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "8d3526cd-455e-4aad-9a3e-a853172a2939",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1809dd713e144a45bebff693031d3fa4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/19060 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>OpinionBefore</th>\n",
       "      <th>OpinionAfter</th>\n",
       "      <th>FriendsBefore</th>\n",
       "      <th>FriendsAfter</th>\n",
       "      <th>NewFriends</th>\n",
       "      <th>DeletedFriends</th>\n",
       "      <th>FolloweesBefore</th>\n",
       "      <th>FolloweesAfter</th>\n",
       "      <th>NewFollowees</th>\n",
       "      <th>DeletedFollowees</th>\n",
       "      <th>FriendsOpinionBeforeAvg</th>\n",
       "      <th>FriendsOpinionAfterAvg</th>\n",
       "      <th>FriendsOpinionBeforeStd</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47</td>\n",
       "      <td>2</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.61</td>\n",
       "      <td>28.0</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>65</td>\n",
       "      <td>68</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>0.59</td>\n",
       "      <td>0.59</td>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>57</td>\n",
       "      <td>2</td>\n",
       "      <td>0.78</td>\n",
       "      <td>0.78</td>\n",
       "      <td>106.0</td>\n",
       "      <td>92</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>91</td>\n",
       "      <td>100</td>\n",
       "      <td>14</td>\n",
       "      <td>5</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>2</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.44</td>\n",
       "      <td>21.0</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>52</td>\n",
       "      <td>59</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>0.41</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.52</td>\n",
       "      <td>9.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>38</td>\n",
       "      <td>41</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46</td>\n",
       "      <td>2</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.50</td>\n",
       "      <td>107.0</td>\n",
       "      <td>102</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.21</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  sex  OpinionBefore  OpinionAfter  FriendsBefore  FriendsAfter  \\\n",
       "0   47    2           0.60          0.61           28.0            25   \n",
       "1   57    2           0.78          0.78          106.0            92   \n",
       "2   37    2           0.44          0.44           21.0            20   \n",
       "3   33    1           0.51          0.52            9.0             6   \n",
       "4   46    2           0.50          0.50          107.0           102   \n",
       "\n",
       "   NewFriends  DeletedFriends  FolloweesBefore  FolloweesAfter  NewFollowees  \\\n",
       "0           0               3               65              68             6   \n",
       "1           1              15               91             100            14   \n",
       "2           0               1               52              59            11   \n",
       "3           0               3               38              41             6   \n",
       "4           0               5               18              20             2   \n",
       "\n",
       "   DeletedFollowees  FriendsOpinionBeforeAvg  FriendsOpinionAfterAvg  \\\n",
       "0                 3                     0.59                    0.59   \n",
       "1                 5                     0.51                    0.51   \n",
       "2                 4                     0.41                    0.42   \n",
       "3                 3                     0.46                    0.42   \n",
       "4                 0                     0.46                    0.46   \n",
       "\n",
       "   FriendsOpinionBeforeStd  \n",
       "0                     0.18  \n",
       "1                     0.19  \n",
       "2                     0.22  \n",
       "3                     0.11  \n",
       "4                     0.21  "
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FrOpBeforeAvg = []\n",
    "FrOpAfterAvg = []\n",
    "FrOpBeforeStd = []\n",
    "\n",
    "\n",
    "\n",
    "for i in tqdm(range(X.shape[0])):\n",
    "    \n",
    "    FrBefore = A1.indices[A1.indptr[i]:A1.indptr[i+1]]\n",
    "    \n",
    "    XFrBefore = X.iloc[FrBefore, :]\n",
    "    \n",
    "    \n",
    "    FrOpBeforeAvg.append(XFrBefore['OpinionBefore'].mean())\n",
    "    FrOpAfterAvg.append(XFrBefore['OpinionAfter'].mean())        ###!!!!!\n",
    "    FrOpBeforeStd.append(XFrBefore['OpinionBefore'].std())\n",
    "    \n",
    "    \n",
    "    \n",
    "X['FriendsOpinionBeforeAvg'] = FrOpBeforeAvg\n",
    "X['FriendsOpinionAfterAvg'] = FrOpAfterAvg\n",
    "X['FriendsOpinionBeforeStd'] = FrOpBeforeStd\n",
    "\n",
    "X.round(2).to_csv(f'{folder}/UsersAttributesGiant.csv', index=False)\n",
    "\n",
    "X.round(2).head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "495a45b8",
   "metadata": {},
   "source": [
    "# Create the table of observations - go through all pairs of vertices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "9d514c02-ff95-4aef-be26-841104967e4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = pd.read_csv(f'{folder}/UsersAttributesGiant.csv')\n",
    "\n",
    "#print(X.shape)\n",
    "\n",
    "A1 = sprs.load_npz(f'{folder}/AdjMatrix(1)Giant.npz')\n",
    "A2 = sprs.load_npz(f'{folder}/AdjMatrix(2)Giant.npz')\n",
    "\n",
    "#print(A1.shape)\n",
    "#print(A2.shape)\n",
    "\n",
    "S1 = sprs.load_npz(f'{folder}/Followees(1)Giant.npz')\n",
    "S2 = sprs.load_npz(f'{folder}/Followees(2)Giant.npz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "313e31c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "83735729 / 176917455\n"
     ]
    }
   ],
   "source": [
    "ColumnsNames = ['ageI', 'ageJ', 'sexI', 'sexJ',\n",
    "                'OpinionBeforeI', 'OpinionAfterI', 'OpinionBeforeJ', 'OpinionAfterJ', \n",
    "                #'FolloweesChangesI', 'FolloweesChangesJ',\n",
    "                'FriendsBeforeNumI', 'FriendsBeforeNumJ', 'FolloweesBeforeNumI', 'FolloweesBeforeNumJ', \n",
    "                'CommFriendsBefore', \n",
    "                #'CommFriendsAfter', \n",
    "                'CommFolloweesBefore', \n",
    "                #'CommFolloweesAfter',\n",
    "                'Target']\n",
    "\n",
    "observations = []\n",
    "\n",
    "n_possible_ties = int(X.shape[0]*(X.shape[0]-1)/2)  # total amount of ties in the graph - n(n+1)/2\n",
    "\n",
    "counter = 0\n",
    "#counter_temp = 0\n",
    "iteration = 1\n",
    "\n",
    "for i in range(X.shape[0]):  # go through each user\n",
    "    \n",
    "    clear_output(wait=True)\n",
    "    print(f'{counter} / {n_possible_ties}')\n",
    "    \n",
    "    # information on the focal user's friends\n",
    "    \n",
    "    FriendsBeforeI = A1.indices[A1.indptr[i]:A1.indptr[i+1]]\n",
    "    FriendsAfterI = A2.indices[A2.indptr[i]:A2.indptr[i+1]]\n",
    "\n",
    "    SetFriendsBeforeI = set(FriendsBeforeI)\n",
    "    SetFriendsAfterI = set(FriendsAfterI)\n",
    "    \n",
    "    FriendsBeforeNumI = X['FriendsBefore'][i]\n",
    "    #NewFriendsNum = X['NewFriends'][i]\n",
    "    #DeletedFriendsNum = X['DeletedFriends'][i]\n",
    "    \n",
    "    # information on the focal user's demographics\n",
    "    \n",
    "    ageI = X['age'][i]\n",
    "    sexI = X['sex'][i]\n",
    "    \n",
    "    # information on the opinions of the focal user before and after\n",
    "    \n",
    "    OpinionBeforeI = X['OpinionBefore'][i]\n",
    "    OpinionAfterI = X['OpinionAfter'][i]\n",
    "    \n",
    "    # information on the focal user's followees\n",
    "    \n",
    "    FolloweesBeforeI = S1.indices[S1.indptr[i]:S1.indptr[i+1]]\n",
    "    FolloweesAfterI = S2.indices[S2.indptr[i]:S2.indptr[i+1]]\n",
    "    \n",
    "    SetFolloweesBeforeI = set(FolloweesBeforeI)\n",
    "    SetFolloweesAfterI = set(FolloweesAfterI)\n",
    "    \n",
    "    FolloweesBeforeNumI = X['FolloweesBefore'][i]\n",
    "    NewFolloweesNumI = X['NewFollowees'][i]\n",
    "    DeletedFolloweesNumI = X['DeletedFollowees'][i]\n",
    "    \n",
    "    \n",
    "    for j in range(i+1, X.shape[0]):  # go through all their online friends (avoiding going through the same edge twice)\n",
    "        \n",
    "        counter = counter + 1\n",
    "        #counter_temp += 1\n",
    "        #clear_output(wait=True)\n",
    "        #print(f'{counter} / {n_possible_ties}')\n",
    "        \n",
    "        # information on the friend's friends\n",
    "        \n",
    "        FriendsBeforeJ = A1.indices[A1.indptr[j]:A1.indptr[j+1]]\n",
    "        FriendsAfterJ = A2.indices[A2.indptr[j]:A2.indptr[j+1]]\n",
    "\n",
    "        SetFriendsBeforeJ = set(FriendsBeforeJ)\n",
    "        SetFriendsAfterJ = set(FriendsAfterJ)\n",
    "        \n",
    "        FriendsBeforeNumJ = X['FriendsBefore'][j]\n",
    "        \n",
    "        # number of common friends\n",
    "        \n",
    "        CommFriendsBefore = len(SetFriendsBeforeI & SetFriendsBeforeJ)\n",
    "        CommFriendsAfter = len(SetFriendsAfterI & SetFriendsAfterJ)\n",
    "        \n",
    "        # information on the friend's demographics\n",
    "\n",
    "        ageJ = X['age'][j]\n",
    "        sexJ = X['sex'][j]\n",
    "\n",
    "        # information on the opinions of the focal user before and after\n",
    "\n",
    "        OpinionBeforeJ = X['OpinionBefore'][j]\n",
    "        OpinionAfterJ = X['OpinionAfter'][j]\n",
    "\n",
    "        # information on the friend's followees\n",
    "\n",
    "        FolloweesBeforeJ = S1.indices[S1.indptr[j]:S1.indptr[j+1]]\n",
    "        FolloweesAfterJ = S2.indices[S2.indptr[j]:S2.indptr[j+1]]\n",
    "\n",
    "        SetFolloweesBeforeJ = set(FolloweesBeforeJ)\n",
    "        SetFolloweesAfterJ = set(FolloweesAfterJ)\n",
    "\n",
    "        FolloweesBeforeNumJ = X['FolloweesBefore'][j]\n",
    "        NewFolloweesNumJ = X['NewFollowees'][j]\n",
    "        DeletedFolloweesNumJ = X['DeletedFollowees'][j]\n",
    "        \n",
    "        CommFolloweesBefore = len(SetFolloweesBeforeI & SetFolloweesBeforeJ)\n",
    "        CommFolloweesAfter = len(SetFolloweesAfterI & SetFolloweesAfterJ)        \n",
    "        \n",
    "        if (A1[i, j] == 0) & (A2[i, j] == 0):\n",
    "        \n",
    "            target = 1  # no no\n",
    "        \n",
    "        elif (A1[i, j] == 0) & (A2[i, j] == 1):\n",
    "        \n",
    "            target = 2  # no yes\n",
    "            \n",
    "        elif (A1[i, j] == 1) & (A2[i, j] == 0):\n",
    "        \n",
    "            target = 3  # yes no\n",
    "            \n",
    "        elif (A1[i, j] == 1) & (A2[i, j] == 1):\n",
    "        \n",
    "            target = 4  # yes yes\n",
    "            \n",
    "        else:\n",
    "            \n",
    "            print('!!')\n",
    "            \n",
    "            break\n",
    "        \n",
    "        new_item = [#i, j, \n",
    "                    ageI, ageJ, sexI, sexJ,\n",
    "                    OpinionBeforeI, OpinionAfterI, OpinionBeforeJ, OpinionAfterJ, \n",
    "                    #NewFolloweesNumI + DeletedFolloweesNumI, NewFolloweesNumJ + DeletedFolloweesNumJ,\n",
    "                    #X['op_after'][i],\n",
    "                    FriendsBeforeNumI, FriendsBeforeNumJ, FolloweesBeforeNumI, FolloweesBeforeNumJ, \n",
    "                    #new_connections_number_i, X['new_connections_number'][j], \n",
    "                    #deleted_connections_number_i, X['deleted_connections_number'][j],\n",
    "                    CommFriendsBefore, \n",
    "                    #CommFriendsAfter, \n",
    "                    CommFolloweesBefore, \n",
    "                    #CommFolloweesAfter,  \n",
    "                    #comm_fr_before_i_after_j, \n",
    "                    #comm_fr_after_i_before_j, \n",
    "                    #comm_fr_after,\n",
    "                    target]\n",
    "        \n",
    "        observations.append(new_item)\n",
    "        \n",
    "        if (counter % 50000000) == 0:\n",
    "            \n",
    "            observations = pd.DataFrame(observations, columns=ColumnsNames)\n",
    "            \n",
    "            observations.to_csv(f'{folder}/ObservationsGiant({iteration}).csv', index=False)\n",
    "            \n",
    "            iteration += 1\n",
    "            \n",
    "            observations = []\n",
    "        \n",
    "        #observations.loc[observations.shape[0]] = new_item\n",
    "        \n",
    "        \n",
    "        \n",
    "observations = pd.DataFrame(observations, columns=ColumnsNames)  \n",
    "\n",
    "observations.to_csv(f'{folder}/ObservationsGiant({iteration}).csv', index=False)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1c69b3f9-c487-4864-9d4f-4604d95c8d1d",
   "metadata": {},
   "source": [
    "# Prepare vectors instead of one giant table - this approach needs less memory resources "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "58a99182-38ef-4880-bd8c-0f841ec8c585",
   "metadata": {},
   "outputs": [],
   "source": [
    "ColumnsNames = ['ageI', 'ageJ', 'sexI', 'sexJ',\n",
    "                'OpinionBeforeI', 'OpinionAfterI', 'OpinionBeforeJ', 'OpinionAfterJ', \n",
    "                #'FolloweesChangesI', 'FolloweesChangesJ',\n",
    "                'FriendsBeforeNumI', 'FriendsBeforeNumJ', 'FolloweesBeforeNumI', 'FolloweesBeforeNumJ', \n",
    "                'CommFriendsBefore', \n",
    "                #'CommFriendsAfter', \n",
    "                'CommFolloweesBefore', \n",
    "                #'CommFolloweesAfter',\n",
    "                'Target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "9129b3d3-e196-4b91-9d84-229b75079d14",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "84b27d9a012a4988a43c2a298c41587f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 ...\n",
      "2 ...\n",
      "3 ...\n",
      "4 ...\n",
      "(176917455, 8)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>OpinionBeforeI</th>\n",
       "      <th>OpinionAfterI</th>\n",
       "      <th>OpinionBeforeJ</th>\n",
       "      <th>OpinionAfterJ</th>\n",
       "      <th>FriendsBeforeNumI</th>\n",
       "      <th>FriendsBeforeNumJ</th>\n",
       "      <th>CommFriendsBefore</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.6</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.78</td>\n",
       "      <td>0.78</td>\n",
       "      <td>25</td>\n",
       "      <td>92</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.6</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.44</td>\n",
       "      <td>25</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.6</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.52</td>\n",
       "      <td>25</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.6</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.50</td>\n",
       "      <td>25</td>\n",
       "      <td>102</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.6</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.30</td>\n",
       "      <td>25</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   OpinionBeforeI  OpinionAfterI  OpinionBeforeJ  OpinionAfterJ  \\\n",
       "0             0.6           0.61            0.78           0.78   \n",
       "1             0.6           0.61            0.44           0.44   \n",
       "2             0.6           0.61            0.51           0.52   \n",
       "3             0.6           0.61            0.50           0.50   \n",
       "4             0.6           0.61            0.31           0.30   \n",
       "\n",
       "   FriendsBeforeNumI  FriendsBeforeNumJ  CommFriendsBefore  Target  \n",
       "0                 25                 92                  4       1  \n",
       "1                 25                 20                  1       1  \n",
       "2                 25                  6                  0       1  \n",
       "3                 25                102                  3       1  \n",
       "4                 25                  8                  0       1  "
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "columns = [\n",
    "           #'ageI', 'ageJ', \n",
    "           #'sexI', 'sexJ',\n",
    "           'OpinionBeforeI', 'OpinionAfterI', 'OpinionBeforeJ', 'OpinionAfterJ', \n",
    "           'FriendsBeforeNumI', 'FriendsBeforeNumJ', \n",
    "           #'FolloweesBeforeNumI', 'FolloweesBeforeNumJ', \n",
    "           'CommFriendsBefore', \n",
    "           #'CommFolloweesBefore', \n",
    "           'Target',\n",
    "          ]\n",
    "\n",
    "observations = []\n",
    "for iteration in tqdm(range(1, 5)):\n",
    "    print(iteration, '...')\n",
    "    observations.append(pd.read_csv(f'{folder}/ObservationsGiant({iteration}).csv', \n",
    "                                    usecols=columns)[columns])\n",
    "\n",
    "observations = pd.concat(observations, ignore_index=True)\n",
    "\n",
    "print(observations.shape)\n",
    "\n",
    "observations.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "90837890-1ba9-4c5c-82dc-04eb1a17b0ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(f'{folder}/Vectors/OpinionBeforeI.npy', observations['OpinionBeforeI'])\n",
    "np.save(f'{folder}/Vectors/OpinionAfterI.npy', observations['OpinionAfterI'])\n",
    "np.save(f'{folder}/Vectors/OpinionBeforeJ.npy', observations['OpinionBeforeJ'])\n",
    "np.save(f'{folder}/Vectors/OpinionAfterJ.npy', observations['OpinionAfterJ'])\n",
    "np.save(f'{folder}/Vectors/FriendsBeforeNumI.npy', observations['FriendsBeforeNumI'])\n",
    "np.save(f'{folder}/Vectors/FriendsBeforeNumJ.npy', observations['FriendsBeforeNumJ'])\n",
    "np.save(f'{folder}/Vectors/CommFriendsBefore.npy', observations['CommFriendsBefore'])\n",
    "np.save(f'{folder}/Vectors/Target.npy', observations['Target'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4cf9f3a2-ec7f-4dc6-8f5a-6db2393ac2b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "106dacab7e70405cbad72445afcab355",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 ...\n",
      "2 ...\n",
      "3 ...\n",
      "4 ...\n",
      "(176917455, 7)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ageI</th>\n",
       "      <th>ageJ</th>\n",
       "      <th>sexI</th>\n",
       "      <th>sexJ</th>\n",
       "      <th>FolloweesBeforeNumI</th>\n",
       "      <th>FolloweesBeforeNumJ</th>\n",
       "      <th>CommFolloweesBefore</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47</td>\n",
       "      <td>57</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>65</td>\n",
       "      <td>91</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>47</td>\n",
       "      <td>37</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>65</td>\n",
       "      <td>52</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>47</td>\n",
       "      <td>33</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>65</td>\n",
       "      <td>38</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>47</td>\n",
       "      <td>46</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>65</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>47</td>\n",
       "      <td>36</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>65</td>\n",
       "      <td>24</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ageI  ageJ  sexI  sexJ  FolloweesBeforeNumI  FolloweesBeforeNumJ  \\\n",
       "0    47    57     2     2                   65                   91   \n",
       "1    47    37     2     2                   65                   52   \n",
       "2    47    33     2     1                   65                   38   \n",
       "3    47    46     2     2                   65                   18   \n",
       "4    47    36     2     1                   65                   24   \n",
       "\n",
       "   CommFolloweesBefore  \n",
       "0                    6  \n",
       "1                    0  \n",
       "2                    0  \n",
       "3                    0  \n",
       "4                    0  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "\n",
    "columns = [\n",
    "           'ageI', 'ageJ', \n",
    "           'sexI', 'sexJ',\n",
    "           #'OpinionBeforeI', 'OpinionAfterI', 'OpinionBeforeJ', 'OpinionAfterJ', \n",
    "           #'FriendsBeforeNumI', 'FriendsBeforeNumJ', \n",
    "           'FolloweesBeforeNumI', 'FolloweesBeforeNumJ', \n",
    "           #'CommFriendsBefore', \n",
    "           'CommFolloweesBefore', \n",
    "           #'Target',\n",
    "          ]\n",
    "\n",
    "observations = []\n",
    "for iteration in tqdm(range(1, 5)):\n",
    "    print(iteration, '...')\n",
    "    observations.append(pd.read_csv(f'{folder}/ObservationsGiant({iteration}).csv', \n",
    "                                    usecols=columns)[columns])\n",
    "\n",
    "observations = pd.concat(observations, ignore_index=True)\n",
    "\n",
    "print(observations.shape)\n",
    "\n",
    "observations.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "55df5713-4fa3-4e85-a26a-23bb0b8b2f75",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(f'{folder}/Vectors/ageI.npy', observations['ageI'])\n",
    "np.save(f'{folder}/Vectors/ageJ.npy', observations['ageJ'])\n",
    "np.save(f'{folder}/Vectors/sexI.npy', observations['sexI'])\n",
    "np.save(f'{folder}/Vectors/sexJ.npy', observations['sexJ'])\n",
    "np.save(f'{folder}/Vectors/FolloweesBeforeNumI.npy', observations['FolloweesBeforeNumI'])\n",
    "np.save(f'{folder}/Vectors/FolloweesBeforeNumJ.npy', observations['FolloweesBeforeNumJ'])\n",
    "np.save(f'{folder}/Vectors/CommFolloweesBefore.npy', observations['CommFolloweesBefore'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c29a8a19-3bdf-4e40-af49-68b5b28dc425",
   "metadata": {},
   "source": [
    "# Set thresholds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44cea11c-9bc9-454c-bb21-999090cdae85",
   "metadata": {},
   "outputs": [],
   "source": [
    "FrNumberThr = [0, 8, 17, 100500]\n",
    "\n",
    "OpinionThr = [0, 1/3, 2/3, 1]\n",
    "\n",
    "OpinionDiffThr = [0, 0.25, 0.5, 0.75, 1]\n",
    "\n",
    "age_thresholds = [0, 31, 40, 100]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a6f5b254-f843-495f-9ecd-1e30e760d24c",
   "metadata": {},
   "source": [
    "# Prepare masks - they will be used in calculating probabilities of tie appearing and tie removing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "599c0a17-fb03-4846-bba2-fe2940b6783d",
   "metadata": {},
   "outputs": [],
   "source": [
    "CommFriendsBefore = np.load(f'{folder}/Vectors/CommFriendsBefore.npy')\n",
    "\n",
    "OpinionBeforeI = np.load(f'{folder}/Vectors/OpinionBeforeI.npy')\n",
    "OpinionBeforeJ = np.load(f'{folder}/Vectors/OpinionBeforeJ.npy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f6e6b88b-b58f-40a5-99c4-1b691b012822",
   "metadata": {},
   "outputs": [],
   "source": [
    "MaskTwoMoreCommFr = CommFriendsBefore >= 2\n",
    "np.save(f'{folder}/Masks/MaskTwoMoreCommFr.npy', MaskTwoMoreCommFr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f5dc6cc3-0895-42ac-8cfe-a9db547a5d69",
   "metadata": {},
   "outputs": [],
   "source": [
    "OpDiffBefore = abs(OpinionBeforeI - OpinionBeforeJ)\n",
    "\n",
    "MaskSmallDiff = (OpDiffBefore < OpinionDiffThr[1])\n",
    "MaskAvgDiff = (OpinionDiffThr[1] <= OpDiffBefore) & (OpDiffBefore < OpinionDiffThr[2])\n",
    "MaskLargeDiff = (OpinionDiffThr[2] <= OpDiffBefore) & (OpDiffBefore < OpinionDiffThr[3])\n",
    "MaskHugeDiff = (OpinionDiffThr[3] <= OpDiffBefore)\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskSmallDiff.npy', MaskSmallDiff)\n",
    "np.save(f'{folder}/Masks/MaskAvgDiff.npy', MaskAvgDiff)\n",
    "np.save(f'{folder}/Masks/MaskLargeDiff.npy', MaskLargeDiff)\n",
    "np.save(f'{folder}/Masks/MaskHugeDiff.npy', MaskHugeDiff)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e15863d2-a423-41fa-955d-b7110905d1d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "##############################################################################################################\n",
    "\n",
    "MaskZeroCommFollowees = CommFolloweesBefore == 0\n",
    "MaskOneMoreCommFollowees = CommFolloweesBefore >= 1\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskZeroCommFollowees.npy', MaskZeroCommFollowees)\n",
    "np.save(f'{folder}/Masks/MaskOneMoreCommFollowees.npy', MaskOneMoreCommFollowees)\n",
    "\n",
    "del(MaskZeroCommFollowees, MaskOneMoreCommFollowees)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c111db81-2487-4ba9-ad66-1e32f0ae6408",
   "metadata": {},
   "outputs": [],
   "source": [
    "OpinionBeforeI = np.load(f'{folder}/Vectors/OpinionBeforeI.npy')\n",
    "OpinionAfterI = np.load(f'{folder}/Vectors/OpinionAfterI.npy')\n",
    "\n",
    "OpinionBeforeJ = np.load(f'{folder}/Vectors/OpinionBeforeJ.npy')\n",
    "OpinionAfterJ = np.load(f'{folder}/Vectors/OpinionAfterJ.npy')\n",
    "\n",
    "FriendsBeforeNumI = np.load(f'{folder}/Vectors/FriendsBeforeNumI.npy')\n",
    "FriendsBeforeNumJ = np.load(f'{folder}/Vectors/FriendsBeforeNumJ.npy')\n",
    "\n",
    "CommFriendsBefore = np.load(f'{folder}/Vectors/CommFriendsBefore.npy')\n",
    "\n",
    "ageI = np.load(f'{folder}/Vectors/ageI.npy')\n",
    "ageJ = np.load(f'{folder}/Vectors/ageJ.npy')\n",
    "\n",
    "sexI = np.load(f'{folder}/Vectors/sexI.npy')\n",
    "sexJ = np.load(f'{folder}/Vectors/sexJ.npy')\n",
    "\n",
    "FolloweesBeforeNumI = np.load(f'{folder}/Vectors/FolloweesBeforeNumI.npy')\n",
    "FolloweesBeforeNumJ = np.load(f'{folder}/Vectors/FolloweesBeforeNumJ.npy')\n",
    "\n",
    "CommFolloweesBefore = np.load(f'{folder}/Vectors/CommFolloweesBefore.npy')\n",
    "\n",
    "Target = np.load(f'{folder}/Vectors/Target.npy')\n",
    "\n",
    "\n",
    "############################################################################################################\n",
    "\n",
    "MaskZeroCommFr = CommFriendsBefore == 0\n",
    "MaskOneCommFr = CommFriendsBefore == 1\n",
    "MaskTwoCommFr = CommFriendsBefore == 2\n",
    "MaskThreeMoreCommFr = CommFriendsBefore >= 3\n",
    "MaskOneMoreCommFr = CommFriendsBefore >= 1\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskZeroCommFr.npy', MaskZeroCommFr)\n",
    "np.save(f'{folder}/Masks/MaskOneCommFr.npy', MaskOneCommFr)\n",
    "np.save(f'{folder}/Masks/MaskTwoCommFr.npy', MaskTwoCommFr)\n",
    "np.save(f'{folder}/Masks/MaskThreeMoreCommFr.npy', MaskThreeMoreCommFr)\n",
    "np.save(f'{folder}/Masks/MaskOneMoreCommFr.npy', MaskOneMoreCommFr)\n",
    "\n",
    "del(MaskZeroCommFr, MaskOneCommFr, MaskTwoCommFr, MaskThreeMoreCommFr, MaskOneMoreCommFr)\n",
    "\n",
    "############################################################################################################\n",
    "\n",
    "MaskIFew = (FriendsBeforeNumI < FrNumberThr[1])\n",
    "MaskIAvg = (FriendsBeforeNumI >= FrNumberThr[1]) & (FriendsBeforeNumI <= FrNumberThr[2])\n",
    "MaskIMany = (FriendsBeforeNumI > FrNumberThr[2])\n",
    "\n",
    "MaskJFew = (FriendsBeforeNumJ < FrNumberThr[1])\n",
    "MaskJAvg = (FriendsBeforeNumJ >= FrNumberThr[1]) & (FriendsBeforeNumJ <= FrNumberThr[2])\n",
    "MaskJMany = (FriendsBeforeNumJ > FrNumberThr[2])\n",
    "\n",
    "MaskFewFew = MaskIFew & MaskJFew\n",
    "\n",
    "MaskFewAvg1 = MaskIFew & MaskJAvg\n",
    "MaskFewAvg2 = MaskIAvg & MaskJFew\n",
    "MaskFewAvg = MaskFewAvg1 | MaskFewAvg2\n",
    "\n",
    "MaskFewMany1 = MaskIFew & MaskJMany\n",
    "MaskFewMany2 = MaskIMany & MaskJFew\n",
    "MaskFewMany = MaskFewMany1 | MaskFewMany2\n",
    "\n",
    "MaskAvgAvg = MaskIAvg & MaskJAvg\n",
    "\n",
    "MaskAvgMany1 = MaskIAvg & MaskJMany\n",
    "MaskAvgMany2 = MaskIMany & MaskJAvg\n",
    "MaskAvgMany = MaskAvgMany1 | MaskJMany\n",
    "\n",
    "\n",
    "MaskManyMany = MaskIMany & MaskJMany\n",
    "\n",
    "del(MaskIFew, MaskIAvg, MaskIMany, MaskJFew, MaskJAvg, MaskJMany)\n",
    "\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskFewFew.npy', MaskFewFew)\n",
    "np.save(f'{folder}/Masks/MaskFewAvg.npy', MaskFewAvg)\n",
    "np.save(f'{folder}/Masks/MaskFewMany.npy', MaskFewMany)\n",
    "np.save(f'{folder}/Masks/MaskAvgAvg.npy', MaskAvgAvg)\n",
    "np.save(f'{folder}/Masks/MaskAvgMany.npy', MaskAvgMany)\n",
    "np.save(f'{folder}/Masks/MaskManyMany.npy', MaskManyMany)\n",
    "\n",
    "del(MaskFewFew, MaskFewAvg, MaskFewMany, MaskAvgAvg, MaskAvgMany, MaskManyMany)\n",
    "\n",
    "############################################################################################################\n",
    "\n",
    "\n",
    "\n",
    "MaskICons = (OpinionBeforeI < OpinionThr[1])\n",
    "MaskIMod = (OpinionBeforeI >= OpinionThr[1]) & (OpinionBeforeI <= OpinionThr[2])\n",
    "MaskILib = (OpinionBeforeI > OpinionThr[2])\n",
    "\n",
    "MaskJCons = (OpinionBeforeJ < OpinionThr[1])\n",
    "MaskJMod = (OpinionBeforeJ >= OpinionThr[1]) & (OpinionBeforeJ <= OpinionThr[2])\n",
    "MaskJLib = (OpinionBeforeJ > OpinionThr[2])\n",
    "\n",
    "MaskConsCons = MaskICons & MaskJCons\n",
    "\n",
    "MaskConsMod1 = MaskICons & MaskJMod\n",
    "MaskConsMod2 = MaskIMod & MaskJCons\n",
    "MaskConsMod = MaskConsMod1 | MaskConsMod2\n",
    "\n",
    "MaskConsLib1 = MaskICons & MaskJLib\n",
    "MaskConsLib2 = MaskILib & MaskJCons\n",
    "MaskConsLib = MaskConsLib1 | MaskConsLib2\n",
    "\n",
    "MaskModMod = MaskIMod & MaskJMod\n",
    "\n",
    "MaskModLib1 = MaskIMod & MaskJLib\n",
    "MaskModLib2 = MaskILib & MaskJMod\n",
    "MaskModLib = MaskModLib1 | MaskModLib2\n",
    "\n",
    "\n",
    "MaskLibLib = MaskILib & MaskJLib\n",
    "\n",
    "del(MaskICons, MaskIMod, MaskILib, MaskJCons, MaskJMod, MaskJLib)\n",
    "\n",
    "#observations.drop(['OpinionBeforeI', 'OpinionBeforeJ'], axis=1, inplace=True)\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskConsCons.npy', MaskConsCons)\n",
    "np.save(f'{folder}/Masks/MaskConsMod.npy', MaskConsMod)\n",
    "np.save(f'{folder}/Masks/MaskConsLib.npy', MaskConsLib)\n",
    "np.save(f'{folder}/Masks/MaskModMod.npy', MaskModMod)\n",
    "np.save(f'{folder}/Masks/MaskModLib.npy', MaskModLib)\n",
    "np.save(f'{folder}/Masks/MaskLibLib.npy', MaskLibLib)\n",
    "\n",
    "del(MaskConsCons, MaskConsMod, MaskConsLib, MaskModMod, MaskModLib, MaskLibLib)\n",
    "\n",
    "############################################################################################################\n",
    "\n",
    "MaskStaticOpinions = (abs(OpinionAfterI - OpinionBeforeI) < 0.05) & (abs(OpinionAfterJ - OpinionBeforeJ) < 0.05) \n",
    "\n",
    "#observations.drop(['OpinionAfterI', 'OpinionBeforeI', 'OpinionAfterJ', 'OpinionBeforeJ'], axis=1, inplace=True)\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskStaticOpinions.npy', MaskStaticOpinions)\n",
    "\n",
    "del(MaskStaticOpinions)\n",
    "\n",
    "##############################################################################################################\n",
    "\n",
    "\n",
    "np.save(f'{folder}/Masks/Target.npy', Target)\n",
    "\n",
    "\n",
    "\n",
    "############################################################################################################\n",
    "\n",
    "\n",
    "MaskYoungI = (ageI < age_thresholds[1])\n",
    "MaskMiddleI = (ageI >= age_thresholds[1]) & (ageI <= age_thresholds[2])\n",
    "MaskOldI = (ageI > age_thresholds[2])\n",
    "\n",
    "MaskYoungJ = (ageJ < age_thresholds[1])\n",
    "MaskMiddleJ = (ageJ >= age_thresholds[1]) & (ageJ <= age_thresholds[2])\n",
    "MaskOldJ = (ageJ > age_thresholds[2])\n",
    "\n",
    "MaskYoungYoung = MaskYoungI & MaskYoungJ\n",
    "\n",
    "MaskYoungMiddle1 = MaskYoungI & MaskMiddleJ\n",
    "MaskYoungMiddle1 = MaskMiddleI & MaskYoungJ\n",
    "MaskYoungMiddle = MaskYoungMiddle1 | MaskYoungMiddle1\n",
    "\n",
    "MaskYoungOld1 = MaskYoungI & MaskOldJ\n",
    "MaskYoungOld2 = MaskOldI & MaskYoungJ\n",
    "MaskYoungOld = MaskYoungOld1 | MaskYoungOld2\n",
    "\n",
    "MaskMiddleMiddle = MaskMiddleI & MaskMiddleJ\n",
    "\n",
    "MaskMiddleOld1 = MaskMiddleI & MaskOldJ\n",
    "MaskMiddleOld2 = MaskOldI & MaskMiddleJ\n",
    "MaskMiddleOld = MaskMiddleOld1 | MaskMiddleOld2\n",
    "\n",
    "MaskOldOld = MaskOldI & MaskOldJ\n",
    "\n",
    "del(MaskYoungI, MaskMiddleI, MaskOldI, MaskYoungJ, MaskMiddleJ, MaskOldJ)\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskYoungYoung.npy', MaskYoungYoung)\n",
    "np.save(f'{folder}/Masks/MaskYoungMiddle.npy', MaskYoungMiddle)\n",
    "np.save(f'{folder}/Masks/MaskYoungOld.npy', MaskYoungOld)\n",
    "np.save(f'{folder}/Masks/MaskMiddleMiddle.npy', MaskMiddleMiddle)\n",
    "np.save(f'{folder}/Masks/MaskMiddleOld.npy', MaskMiddleOld)\n",
    "np.save(f'{folder}/Masks/MaskOldOld.npy', MaskOldOld)\n",
    "\n",
    "del(MaskYoungYoung, MaskYoungMiddle, MaskYoungOld, MaskMiddleMiddle, MaskMiddleOld, MaskOldOld)\n",
    "\n",
    "############################################################################################################\n",
    "\n",
    "MaskMaleI = (sexI == 2)\n",
    "MaskFemaleI = (sexI == 1) \n",
    "\n",
    "MaskMaleJ = (sexJ == 2)\n",
    "MaskFemaleJ = (sexJ == 1) \n",
    "\n",
    "MaskMaleMale= MaskMaleI & MaskMaleJ\n",
    "\n",
    "MaskMaleFemale1 = MaskMaleI & MaskFemaleJ\n",
    "MaskMaleFemale2 = MaskFemaleI & MaskMaleJ\n",
    "MaskMaleFemale = MaskMaleFemale1 | MaskMaleFemale2\n",
    "\n",
    "MaskFemaleFemale = MaskFemaleI & MaskFemaleJ\n",
    "\n",
    "del(MaskMaleI, MaskFemaleI, MaskMaleJ, MaskFemaleJ)\n",
    "\n",
    "observations.drop(['sexI', 'sexJ'], axis=1, inplace=True)\n",
    "\n",
    "np.save(f'{folder}/Masks/MaskMaleMale.npy', MaskMaleMale)\n",
    "np.save(f'{folder}/Masks/MaskMaleFemale.npy', MaskMaleFemale)\n",
    "np.save(f'{folder}/Masks/MaskFemaleFemale.npy', MaskFemaleFemale)\n",
    "\n",
    "del(MaskMaleMale, MaskMaleFemale, MaskFemaleFemale)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
